#This script estimates the synthetic DiD for Egypt and Tunisia
#It produces Figure I1 in the paper
library(missRanger)
library(synthdid)
library(ggplot2)
library(ggthemes)
library(dplyr)
library(nlme)
library(AICcmodavg)
library(stringr)
library(boot)
library(ISOweek)

#Bring in Egypt and Tunisia data
dir_name <- paste0("data/output/cos_sims_mastun/")
tunisia_source <- readRDS(paste0(dir_name, "turess", "/cos_simsdf_all_bysource.rds"))
egypt_source <- readRDS(paste0(dir_name, "masress", "/cos_simsdf_all_bysource.rds"))

# Generate country unit ID
tunisia_source$country_ID <- "Tunisia"
egypt_source$country_ID <- "Egypt"

#Append together
appended_df <- rbind(tunisia_source, egypt_source)

#Get list of unique newspapers
unique_newspapers <- unique(appended_df$newspaper)

#Define years and weeks of observation period
years <- 2011:2019
weeks <- 1:52

#create week year newspaper panel dataset
panel_df <- expand.grid(
  newspaper = unique_newspapers,
  year = years,
  week = weeks
)

#merge in country data
filtered_df <- appended_df[!duplicated(appended_df$newspaper), c("newspaper", "country_ID")]

panel_df <- merge(panel_df, filtered_df, by = "newspaper", all.x = TRUE)

# Create date using ISO week
panel_df$iso_week <- sprintf("%d-W%02d", panel_df$year, panel_df$week)
panel_df$date <- ISOweek2date(paste0(panel_df$iso_week, "-1"))

#merge together
appended_df$group <- as.Date(appended_df$group)

analysis_df <- merge(
  panel_df,
  appended_df[, c("group", "newspaper", "val")],
  by.x = c("date", "newspaper"),
  by.y = c("group", "newspaper"),
  all.x = TRUE
)

#create newspaper week running time variable
analysis_df <- analysis_df %>%
  arrange(newspaper, date) %>%
  group_by(newspaper) %>%
  mutate(running_time = row_number()) %>%
  ungroup()

#create coup treatment indicator
analysis_df <- analysis_df %>%
  mutate(coup_treatment = ifelse(running_time >= 131 & country_ID == "Egypt", 1, 0))

#make panel unit
analysis_df$newspaper <- as.factor(analysis_df$newspaper)

#random forest MI for missing DV values
im_df <- missRanger(analysis_df, num.trees = 100)

#subset to unit, time, outcome, and treatment
subset_df <- im_df %>%
  select(newspaper, running_time, val, coup_treatment)
subset_df <- as.data.frame(subset_df)

#shape data for synthdid
setup = panel.matrices(subset_df, unit = "newspaper", 
                       time = "running_time",  
                       outcome = "val", 
                       treatment = "coup_treatment")

#estimates                  
tau.hat = synthdid_estimate(setup$Y, setup$N0, setup$T0)
tau.hat
print(summary(tau.hat))

#compare with other estimators
tau.sc   = sc_estimate(setup$Y, setup$N0, setup$T0)
tau.did  = did_estimate(setup$Y, setup$N0, setup$T0)
estimates = list(tau.did, tau.sc, tau.hat)
names(estimates) = c('Diff-in-Diff', 'Synthetic Control', 'Synthetic Diff-in-Diff')

#Figure 4
gg <- synthdid_plot(estimates[["Synthetic Diff-in-Diff"]])

gg + theme_tufte(base_family = "Helvetica") +
  scale_color_manual(values = c("#E69F00", "#56B4E9")) +
  theme(panel.grid.major = element_line(colour = "grey", size = .1),
        legend.position=c(.15,.2),
        legend.direction="vertical",
        legend.title = element_blank(),
        panel.border = element_rect(colour = "black", fill=NA),
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "black"),
        axis.text.x = element_text(size=15),
        axis.text.y = element_text(size=15),
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=10),
        legend.text=element_text(size=10),
        plot.background = element_rect(fill = "white", colour = NA),
        panel.grid.minor = element_line(size = 0.1, linetype = "solid"),
        strip.text = element_text(size = 15)) +
  ylim(-0.1, 0.15) + 
  ylab("Cosine similarity, leaders : opposition index") +
  xlab("Weeks since the start of democratic transition") +
  geom_vline(xintercept = 130, linetype="dashed", color = "grey", size=1)

ggsave("plots/fig4.png", 
       width=200, height = 125, 
       dpi=300, units="mm", bg = "white")

#standard deviation of val in pre-coup period
sd_value <- sd(im_df$val[im_df$country_ID == "Egypt" & im_df$coup_treatment == 0])

###########################################
# Appendix Figure I.1
#interrupted time series using weekly average of medcrit scores
#for Egyptian newspapers
###########################################

its_df <- im_df %>% 
  filter(country_ID == "Egypt")

df_collapsed <- its_df %>%
  group_by(running_time) %>%
  summarise(dv_avg = mean(val, na.rm = TRUE)) %>%
  ungroup()

df_collapsed$coup <- ifelse(df_collapsed$running_time >= 131, 1, 0)

df_collapsed <- df_collapsed %>%
  mutate(post_intervention_time = ifelse(running_time >= 131, 
                                         row_number() - min(row_number()[running_time >= 131]) + 1, 
                                         0))

#simple model to estimate autocorrelation lags
model.a = gls(dv_avg ~ running_time + coup + post_intervention_time, data = df_collapsed,method="ML")
summary(model.a)

#graph it
df_collapsed<-df_collapsed %>% mutate(
  model.a.predictions = predictSE.gls (model.a, df_collapsed, se.fit=T)$fit,
  model.a.se = predictSE.gls (model.a, df_collapsed, se.fit=T)$se
)

ggplot(df_collapsed,aes(running_time,dv_avg))+
  geom_ribbon(aes(ymin = model.a.predictions - (1.96*model.a.se), ymax = model.a.predictions + (1.96*model.a.se)), fill = "lightgreen")+
  geom_line(aes(running_time,model.a.predictions),color="black",lty=1)+
  geom_point(alpha=0.3)

#take into account autocorrelation
mod.1 = dv_avg ~ running_time + coup + post_intervention_time

fx = function(pval,qval){summary(gls(mod.1, data = df_collapsed, correlation= corARMA(p=pval,q=qval, form = ~ running_time),method="ML"))$AIC}

p = summary(gls(mod.1, data = df_collapsed,method="ML"))$AIC
message(str_c ("AIC Uncorrelated model = ",p))

autocorrel = expand.grid(pval = 0:2, qval = 0:2)

for(i in 2:nrow(autocorrel)){p[i] = try(summary(gls(mod.1, data = df_collapsed, correlation= corARMA(p=autocorrel$pval[i],q=autocorrel$qval[i], form = ~ running_time),method="ML"))$AIC)}

autocorrel<- autocorrel %>%
  mutate(AIC = as.numeric(p)) %>%
  arrange(AIC)

#Pval = 0; qval = 1
autocorrel

model.b = gls(dv_avg ~ running_time + coup + post_intervention_time, data = df_collapsed,method="ML", correlation= corARMA(p=0,q=1, form = ~ running_time))
coefficients(model.a)
coefficients(model.b)

df_collapsed<- df_collapsed %>% 
  mutate(
    model.b.predictions = predictSE.gls (model.b, df_collapsed, se.fit=T)$fit,
    model.b.se = predictSE.gls (model.b, df_collapsed, se.fit=T)$se
  )

df_null<-filter(df_collapsed,running_time<131)
model.c = gls(dv_avg ~ running_time, data = df_null, correlation= corARMA(p=0, q=1, form = ~ running_time),method="ML")
coefficients(model.a)
coefficients(model.c)

summary(model.b)

df_collapsed<-df_collapsed %>% mutate(
  model.c.predictions = predictSE.gls (model.c, newdata = df_collapsed, se.fit=T)$fit,
  model.c.se = predictSE.gls (model.c, df_collapsed, se.fit=T)$se
)

#Figure I.1
ggplot(df_collapsed,aes(running_time,dv_avg))+
  geom_ribbon(aes(ymin = model.c.predictions - (1.96*model.c.se), ymax = model.c.predictions + (1.96*model.c.se)), fill = "lightcoral", alpha = 0.2)+
  geom_line(aes(running_time,model.c.predictions),color="lightcoral",lty=2, alpha = 0.2)+
  geom_ribbon(aes(ymin = model.b.predictions - (1.96*model.b.se), ymax = model.b.predictions + (1.96*model.b.se)), fill = "lightcoral")+
  geom_line(aes(running_time,model.b.predictions),color="red",lty=1)+
  geom_point(alpha=0.1) +
  theme_bw() +
  ylab("Cosine similarity, leaders : opposition index") +
  xlab("Weeks since the start of democratic transition") +
  geom_vline(xintercept = 130, linetype="dashed", color = "grey", size=1)

ggsave("plots/figI1.png", 
       width=200, height = 125, 
       dpi=300, units="mm", bg = "white")
